## Plot Balance and Precision statistics
## 9 April 2012
## 13 June 2012

setwd("blockSeqDataverse/")
load("data/balprecAllMVN.RData")

library(ggplot2)
library(plyr)

addSBminusStat <- function(balprecData){
  tmp <- ddply(balprecData, .(sim.sb, stat.name), transform, sbMinusStatVal=stat.val[rand %in% "sb"]-stat.val)
  return(tmp)
}

balprec.r0 <- addSBminusStat(balprec.r0)
balprec.r8 <- addSBminusStat(balprec.r8)
balprec.r6outlier2 <- addSBminusStat(balprec.r6outlier2)
balprec.r6outlier20 <- addSBminusStat(balprec.r6outlier20)
balprec.r6outlier35 <- addSBminusStat(balprec.r6outlier35)
balprec.r8bimodal <- addSBminusStat(balprec.r8bimodal)

summarizeBalPrecData <- function(balprecData, by.sb = TRUE){
  fmla <- ~ sim.sb + stat.name
  if(by.sb == FALSE){
    fmla <- ~ sim.cr + stat.name
  }
  tmp2 <- ddply(balprecData, fmla, summarise, median=median(sbMinusStatVal[rand %in% "cr"]), min=min(sbMinusStatVal[rand %in% "cr"]), max=max(sbMinusStatVal[rand %in% "cr"]), propSBsmaller = mean(sbMinusStatVal[rand %in% "cr"] < 0))
  ## cuts any "SB minus SB" values
  return(tmp2)
}

bp.summ.r0 <- summarizeBalPrecData(balprec.r0)
bp.summ.r0cr <- summarizeBalPrecData(balprec.r0, by.sb = FALSE)
bp.summ.r8 <- summarizeBalPrecData(balprec.r8)
bp.summ.r8cr <- summarizeBalPrecData(balprec.r8, by.sb = FALSE)
bp.summ.r6o2 <- summarizeBalPrecData(balprec.r6outlier2)
bp.summ.r6o2cr <- summarizeBalPrecData(balprec.r6outlier2, by.sb = FALSE)
bp.summ.r6o20 <- summarizeBalPrecData(balprec.r6outlier20)
bp.summ.r6o20cr <- summarizeBalPrecData(balprec.r6outlier20, by.sb = FALSE)
bp.summ.r6o35 <- summarizeBalPrecData(balprec.r6outlier35)
bp.summ.r6o35cr <- summarizeBalPrecData(balprec.r6outlier35, by.sb = FALSE)
bp.summ.r8bim <- summarizeBalPrecData(balprec.r8bimodal)
bp.summ.r8bimcr <- summarizeBalPrecData(balprec.r8bimodal, by.sb = FALSE)

## In median experiment, blocking produces more balance than:
1-bp.summ.r0[which(bp.summ.r0$median == sort(bp.summ.r0$median[bp.summ.r0$stat.name == "d2.p"])[51]), ]$propSBsmaller
## In median experiment, blocking produces more precision than:
bp.summ.r0[which(bp.summ.r0$median == sort(bp.summ.r0$median[bp.summ.r0$stat.name == "vdh"])[51]), ]$propSBsmaller

source("code/plotSBminusCR.R")

## Figure 1
plotSBminusCR(bp.summ.r0, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r0, "vdh", geom="segs")

## Supplementary Figure 1:
plotSBminusCR(bp.summ.r0cr, "d2.p", geom="segs", by.sb = FALSE)
plotSBminusCR(bp.summ.r0cr, "vdh", geom="segs", by.sb = FALSE)

## Figure 2:
plotSBminusCR(bp.summ.r8, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r8, "vdh", geom="segs")
plotSBminusCR(bp.summ.r8cr, "d2.p", geom="segs", by.sb=FALSE)
plotSBminusCR(bp.summ.r8cr, "vdh", geom="segs", by.sb=FALSE)

## Figure 3:
plotSBminusCR(bp.summ.r6o2, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r6o2, "vdh", geom="segs")
plotSBminusCR(bp.summ.r6o35, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r6o35, "vdh", geom="segs")

## Supplementary Figure 2:
plotSBminusCR(bp.summ.r6o20, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r6o20, "vdh", geom="segs")
plotSBminusCR(bp.summ.r6o20cr, "d2.p", geom="segs", by.sb = FALSE)
plotSBminusCR(bp.summ.r6o20cr, "vdh", geom="segs", by.sb = FALSE)

## Supplementary Figure 3: 
plotSBminusCR(bp.summ.r8bim, "d2.p", geom="segs")
plotSBminusCR(bp.summ.r8bim, "vdh", geom="segs")
plotSBminusCR(bp.summ.r8bimcr, "d2.p", geom="segs", by.sb = FALSE)
plotSBminusCR(bp.summ.r8bimcr, "vdh", geom="segs", by.sb = FALSE)

######### Outlier simulations:
source("code/simulateOutlier.R")
source("code/seqblock1.R")
source("code/seqblock2k.R")
source("code/mahal.R")
source("code/ksTCrealLoop.R")
library(MCMCpack)

simulateOutlier(r=.6, outfileDir="data/simsMVNoutlier/outlier2", whenoutlier=2)
simulateOutlier(r=.6, outfileDir="data/simsMVNoutlier/outlier20", whenoutlier=20)
simulateOutlier(r=.6, outfileDir="data/simsMVNoutlier/outlier35", whenoutlier=35)

out2 <- ksTCrealLoop("data/simsMVNoutlier/outlier2", variable = "cont2")
out20 <- ksTCrealLoop("data/simsMVNoutlier/outlier20", variable = "cont2")
out35 <- ksTCrealLoop("data/simsMVNoutlier/outlier35", variable = "cont2")

## Table 3:
## Inexact replication below, but pattern persists.
mean(out2 < .05)
mean(out2 < .1)
mean(out20 < .05)
mean(out20 < .1)
mean(out35 < .05)
mean(out35 < .1)

## Extreme bimodal data:
## In median experiment, blocking produces more balance than:
1-bp.summ.r8bim[which(bp.summ.r8bim$median == sort(bp.summ.r8bim$median[bp.summ.r8bim$stat.name == "d2.p"])[51]), ]$propSBsmaller
## In median experiment, blocking produces more precision than:
bp.summ.r8bim[which(bp.summ.r8bim$median == sort(bp.summ.r8bim$median[bp.summ.r8bim$stat.name == "vdh"])[51]), ]$propSBsmaller
